/* Notes

The descriptive statistics are based on version 4-0-0 of the TwinLife dataset. Access can be requested through GESIS here: https://search.gesis.org/research_data/ZA6701

The descriptive stats are only based on Wave 1 data, as are the twin models.

The Stata code for the averages in Table 1 is available in the "Table 1 calculations.do" file

*/

cd "C:\Users\aleks\Desktop\ZA6701_TwinLife_v4-0\Stata" /* Set working directory to location of version 4-0-0 data */

clear
use ZA6701_person_wid1_v4-0-0 /* F2F 1 */

#delimit ;
keep fid pid ptyp pop* 
	ses0100 ses0101 ses0102
	sop010* sop0150 sop0200
	zyg0102 eca0106 sex age0100 age0101 age0200 hoe* iva* wid
	liv0410 age0100;
#delimit cr

keep if ptyp<3 | ptyp==300 | ptyp==400 /* Keep twins and parents only */

drop if zyg0102==-92 | zyg0102==-95 /* Drop if zygosity is missing */

* Remove missing values
foreach var of varlist * {
	replace `var' = . if `var'<0
}

*** Clean variables
local v1=""
local pre1="w1_"

foreach num of numlist 1 {

* pop0100 - interest in politics - four categories
tab `v`num''pop0100
gen `pre`num''interest=`v`num''pop0100

* pop0200 – attending political meetings/discussions/protest - dichotomous
tab `v`num''pop0200
gen `pre`num''part_meeting=`v`num''pop0200
recode `pre`num''part_meeting 2=0

* pop0201 – participating in (online) petitions - dichotomous
tab `v`num''pop0201
gen `pre`num''part_petition=`v`num''pop0201
recode `pre`num''part_petition 2=0

* pop0202 – boycott of companies/products - dichotomous
tab `v`num''pop0202
gen `pre`num''part_boycott=`v`num''pop0202
recode `pre`num''part_boycott 2=0

* pop0300 – participation in last parliamentary election
tab `v`num''pop0300
gen `pre`num''part_vote=`v`num''pop0300
recode `pre`num''part_vote 3=. 2=0 /* 3 was not eligibile */

* pop0301 – hypothetical participation in election
tab `v`num''pop0301
gen `pre`num''part_votehyp=`v`num''pop0301
recode `pre`num''part_votehyp 2=0

* pop0302 – political leanings party
tab `v`num''pop0302
/* Temporary rough coding
1. Left - Die Linke (6)
2. Center left - SPD (2); Greens (5)
3. Center - Piratenpartei (8)
4. Center right - CDU/CSU (3); FDP (4)
5. Right - AfD (9); NPD/Republikaner (7)
*/
gen `pre`num''party_ideo = `v`num''pop0302
recode `pre`num''party_ideo 10=. /* Other, but unsure where listed */

gen `pre`num''party_none = 1 if `pre`num''party_ideo==1
replace `pre`num''party_none = 0 if `pre`num''party_ideo>=2 & `pre`num''party_ideo<=12

recode `pre`num''party_ideo 1=. 2=2 3=4 4=4 5=2 6=1 7=5 8=3 9=5 11=4 12=4 /*CDU/CSU is 3 at wave 1, split into 11 and 12 at wave 2*/

gen `pre`num''party_comb = `pre`num''party_ideo
replace `pre`num''party_comb = 3 if `pre`num''party_none==1 /* put non-partisans at center */

* sop0106 – participation political organization/party
tab `v`num''sop0106
gen `pre`num''part_politorgfreq = `v`num''sop0106
recode `pre`num''part_politorgfreq 4=0 3=1 2=2 1=3

}

* sex
recode sex 2=0

* age categories

gen agecat = 0 if age0200<=6
replace agecat = 1 if age0200<=12 & agecat==.
replace agecat = 2 if age0200<=18 & agecat==.
replace agecat = 3 if age0200<=25 & agecat==.
replace agecat = . if ptyp>3

* self-esteem

alpha ses0100-ses0102
gen w1_selfesteem = (ses0101 + ses0102 + (6-ses0100))/3

* parental household - live at home

gen w1_parentalhousehold = 1 if liv0410==2 & ptyp<=2 & age0100>=18
replace w1_parentalhousehold = 0 if liv0410>0 & liv0410!=2 & liv0410<=5 & ptyp<=2 & age0100>=18

* Calculate parental variables

keep sex fid ptyp zyg0102 agecat w1_* ses* sop* w1_parentalhousehold

sort fid ptyp

foreach var of varlist w1_part* w1_interest sop010*t sop010*u  {
	by fid: egen temp=mean(`var') if ptyp==300
	by fid: egen m_`var'=total(temp), missing
	drop temp
	
	by fid: egen temp=mean(`var') if ptyp==400
	by fid: egen f_`var'=total(temp), missing
	drop temp
}

drop if ptyp>3

rename m_w1_interest w1_m_interest
rename f_w1_interest w1_f_interest

drop m_sop* f_sop*

* Zygosity and parental interest

rename zyg0102 zyg

gen r=1 if zyg==1
replace r=0.5 if zyg==2

gen mz = zyg

gen mzsex = 1 if mz==1 & sex==1 /* mzm */
replace mzsex = 2 if mz==2 & sex==1 /* dzm */
replace mzsex = 3 if mz==1 & sex==0 /* mzf */
replace mzsex = 4 if mz==2 & sex==0 /* dzf */

gen mzage = 1 if mz==1 & agecat==1 /* mz1 */
replace mzage = 2 if mz==2 & agecat==1 /* dz1 */
replace mzage = 3 if mz==1 & agecat==2 /* mz2 */
replace mzage = 4 if mz==2 & agecat==2 /* dz2 */
replace mzage = 5 if mz==1 & agecat==3 /* mz3 */
replace mzage = 6 if mz==2 & agecat==3 /* dz3 */

gen w1_p_interest = w1_m_interest
replace w1_p_interest = (w1_p_interest + w1_f_interest)/2 if w1_p_interest!=. & w1_f_interest!=.
replace w1_p_interest = w1_f_interest if w1_p_interest==.

qui sum w1_p_interest
gen w1_p_interest2 = 0 if w1_p_interest!=. & w1_p_interest<r(mean)
qui sum w1_p_interest
replace w1_p_interest2 = 1 if w1_p_interest!=. & w1_p_interest>r(mean)

qui sum w1_p_interest
local low = r(mean)-r(sd)
local high = r(mean)+r(sd)
gen w1_p_interest3 = 0 if w1_p_interest!=. & w1_p_interest<`low'
replace w1_p_interest3 = 2 if w1_p_interest!=. & w1_p_interest>`high'
replace w1_p_interest3 = 1 if w1_p_interest!=. & w1_p_interest>`low' & w1_p_interest<`high'

outsheet using twinlife_table1_long.csv, comma nolabel replace

* Reshape data

rename sop0100 w1_sop0100
rename sop0101 w1_sop0101
rename sop0102 w1_sop0102
rename sop0103 w1_sop0103
rename sop0104 w1_sop0104
rename sop0105 w1_sop0105
rename sop0106 w1_sop0106

foreach var of varlist w1_interest-w1_part_politorgfreq w*_parentalhousehold w*_sop010* w*_selfesteem {
	rename `var' `var'_
}

capture drop ses* sop*

reshape wide *_, i(fid) j(ptyp)

* Randomize Twin 1 and Twin 2

set seed 08092018
capture drop rand
gen rand = rnormal()
replace rand = 0 if rand<=0
replace rand = 1 if rand>0

foreach var of varlist *1 {

local stem = subinstr("`var'","_1","_",1)
display "`stem'"

gen `var'_temp1 = `var' if rand==1
gen `var'_temp2 = `stem'2 if rand==1
replace `var' = `var'_temp2 if rand==1
replace `stem'2 = `var'_temp1 if rand==1
drop *temp1 *temp2

}

outsheet using twinlife_table1.csv, comma nolabel replace
